# -*- coding: utf-8 -*- """ Created on Wed Dec 21 10:53:12 2022 @author: janmo """ import statistics from scipy.stats import mannwhitneyu import pandas as pd import numpy as np import random import math import seaborn as sns import matplotlib.pyplot as plt import statsmodels.api as sm from scipy.stats import wilcoxon from sklearn.linear_model import LogisticRegression # Read in and select Data Data_A = pd.read_csv('Final/all_apps_wide-2022-12-20 (1).csv') Data_A = Data_A.iloc[2:,:] Data_A = Data_A.loc[(pd.isna(Data_A['participant._current_app_name'])==False) & (pd.isna(Data_A['lottery_risk_game.1.player.BDM'])==False), :] Data_A = Data_A.loc[Data_A['participant.id_in_session']!=215] Data_A['treatment'] = Data_A.apply(lambda row: 0 if row['lottery_risk_game.1.player.treatment']=='baseline' else 1, axis=1) # create list with plane feature names def cut_feature_string(string): string_cut = string[27:] return string_cut survey_features = Data_A.columns[55:68] survey_features_cut = list(map(cut_feature_string, survey_features)) for i, column_name in zip(range(55,68), survey_features_cut): fig, axes = plt.subplots(figsize=(16,8)) sns.histplot(Data_A.iloc[:,i], bins=10, kde=True, stat="density", linewidth=0).set_title(column_name, fontsize=20, weight='bold') axes.set_xlabel(column_name, fontsize=15, weight='bold') axes.set_ylabel('Density', fontsize=15, weight='bold') plt.show() print(column_name) print(Data_A.iloc[:,i].describe()) print('- - -') ### Aggregate construcs Data_A['Cognitive_trust'] = (Data_A['lottery_risk_game.1.player.algo_expert'] + Data_A['lottery_risk_game.1.player.algo_knowledge'])/2 Data_A['Emotional_trust'] = (Data_A['lottery_risk_game.1.player.feel_secure'] + Data_A['lottery_risk_game.1.player.feel_comfortable'] + Data_A['lottery_risk_game.1.player.feel_content'])/3 Data_A['Privacy'] = (Data_A['lottery_risk_game.1.player.privacy_1'] + Data_A['lottery_risk_game.1.player.privacy_2'])/2 # Select treatment participants Data_A_treatment = Data_A.loc[Data_A['lottery_risk_game.1.player.treatment']=='treatment'] # Variable list var_list = ['lottery_risk_game.1.player.BDM', 'lottery_risk_game.1.player.perceived_accuracy', 'lottery_risk_game.1.player.perceived_rmse', 'Privacy', 'Cognitive_trust', 'Emotional_trust', 'lottery_risk_game.1.player.transparency', 'lottery_risk_game.1.player.power'] # Mean ans stdev of variables - Baseline and Treatment for var in var_list: print('# - - - - - -', var, '- - - - - - - #') print('Means:') print(Data_A.groupby('lottery_risk_game.1.player.treatment')[var].mean()) print(' ') print('Standard deviations:') print(Data_A.groupby('lottery_risk_game.1.player.treatment')[var].std()) print(' ') print('OLS:') X = Data_A['treatment'] y = Data_A[var] X = sm.add_constant(X) X.head() h1_est_1 = sm.OLS(y, X).fit() print(h1_est_1.summary()) print("\n", "\n") # Whitneyutest X_baseline = Data_A.loc[Data_A['treatment']==0, var] y_treatment = Data_A.loc[Data_A['treatment']==1, var] print(mannwhitneyu(X_baseline, y_treatment)) print('________________________________________________',"\n", "\n", "\n", "\n") # Mean ans stdev of variables - Baseline and Treatment min. x # Prepare treatment participants # Check which pars did not choose features with threshold Data_A['conducted_dec_fs?']=0 dummy_metalist = [] for i in range(0,len(Data_A)): dummy_list = [] for j in range(68,81): dummy = Data_A.iloc[i,j] dummy_list.append(dummy) if sum(dummy_list) >= 1: check_fs = True else: check_fs = False if check_fs==False: Data_A['conducted_dec_fs?'].iloc[i]=0 #0 if no dec fs conducted else: Data_A['conducted_dec_fs?'].iloc[i]=1 dummy_metalist.append(dummy_list) print(len(Data_A.loc[Data_A['conducted_dec_fs?']==1])/len(Data_A_treatment)) var_list = ['lottery_risk_game.1.player.BDM', 'lottery_risk_game.1.player.perceived_accuracy', 'lottery_risk_game.1.player.perceived_rmse', 'Privacy', 'Cognitive_trust', 'Emotional_trust', 'lottery_risk_game.1.player.transparency', 'lottery_risk_game.1.player.power'] Data_A_min = Data_A.loc[(Data_A['lottery_risk_game.1.player.treatment']=='baseline') | ((Data_A['lottery_risk_game.1.player.treatment']=='treatment') & (Data_A['conducted_dec_fs?']==1))] for var in var_list: print('# - - - - - -', var, '- - - - - - - #') print('Means:') print(Data_A_min.groupby('treatment')[var].mean()) print(' ') print('Standard deviations:') print(Data_A_min.groupby('treatment')[var].std()) print(' ') print('OLS:') X = Data_A_min['treatment'] y = Data_A_min[var] X = sm.add_constant(X) X.head() h1_est_1 = sm.OLS(y, X).fit() print(h1_est_1.summary()) print('________________________________________________',"\n", "\n", "\n", "\n") # Whitneyutest X_baseline = Data_A.loc[Data_A['treatment']==0, var] y_treatment = Data_A.loc[Data_A['treatment']==1, var] print(mannwhitneyu(X_baseline, y_treatment)) print('________________________________________________',"\n", "\n", "\n", "\n") print(Data_A_min['conducted_dec_fs?'].sum(), 'participants involved') ### Median of # hidden features sum_hidden_features_list = [] for i in dummy_metalist: sum_hidden_features = sum(i) sum_hidden_features_list.append(sum_hidden_features) for quantile in [0.5, 0.6, 0.7, 0.8, 0.9]: print(quantile, 'quantile:', np.quantile(sum_hidden_features_list, quantile)) # Visualization BDM for each treatment threshold BDM_mean_per_threshold = [] for threshold in range(0,13): print(threshold) Data_A['conducted_dec_fs?']=0 for i in range(0,len(Data_A)): dummy_list = [] for j in range(68,81): dummy = Data_A.iloc[i,j] dummy_list.append(dummy) if sum(dummy_list) >= threshold: check_fs = True else: check_fs = False if check_fs==False: Data_A['conducted_dec_fs?'].iloc[i]=0 #0 if no dec fs conducted else: Data_A['conducted_dec_fs?'].iloc[i]=1 Data_A_min = Data_A.loc[(Data_A['lottery_risk_game.1.player.treatment']=='treatment') & (Data_A['conducted_dec_fs?']==1)] BDM_value = Data_A_min['lottery_risk_game.1.player.BDM'].mean() BDM_mean_per_threshold.append(BDM_value) print(threshold) fig, axes = plt.subplots(figsize=(12,5)) sns.lineplot(list(range(1,14)), BDM_mean_per_threshold) axes.set_xlabel('# minimum hidden features', fontsize=15, weight='bold') axes.set_ylabel('Schadensrate', fontsize=15, weight='bold') axes.set_title('Treatment WTP for different # of hidden features', fontsize=15, weight='bold') # Backup # - - - WTP - - - # X_baseline_test1 = Data_A.loc[Data_A['treatment']==0, 'lottery_risk_game.1.player.BDM'].isna().sum() y_treatment_test1 = Data_A.loc[Data_A['treatment']==1, 'lottery_risk_game.1.player.BDM'] X_baseline_test2 = Data_A.loc[Data_A['treatment']==0, 'lottery_risk_game.1.player.perceived_accuracy'].isna().sum() y_treatment_test2 = Data_A.loc[Data_A['treatment']==1, 'lottery_risk_game.1.player.perceived_accuracy'].isna().sum() X_baseline_test2 = Data_A.loc[Data_A['treatment']==0, 'lottery_risk_game.1.player.perceived_rmse'].isna().sum() y_treatment_test2 = Data_A.loc[Data_A['treatment']==1, 'lottery_risk_game.1.player.perceived_rmse'].isna().sum() X_baseline_test2 = Data_A.loc[(Data_A['treatment']==0) & (Data_A['Privacy'].isna()==True), 'Privacy'] y_treatment_test2 = Data_A.loc[Data_A['treatment']==1, 'Privacy'].isna().sum() mannwhitneyu(X_baseline_test2, y_treatment_test2) print(Data_A.groupby('lottery_risk_game.1.player.treatment')['lottery_risk_game.1.player.BDM'].mean()) print(Data_A.groupby('lottery_risk_game.1.player.treatment')['lottery_risk_game.1.player.BDM'].std()) X = Data_A['treatment'] y = Data_A['lottery_risk_game.1.player.BDM'] X = sm.add_constant(X) X.head() h1_est_1 = sm.OLS(y, X).fit() print(h1_est_1.summary()) # - - - Perc_acc - - - # print(Data_A.groupby('lottery_risk_game.1.player.treatment')['lottery_risk_game.1.player.perceived_accuracy'].mean()) print(Data_A.groupby('lottery_risk_game.1.player.treatment')['lottery_risk_game.1.player.perceived_accuracy'].std()) X = Data_A['treatment'] y = Data_A['lottery_risk_game.1.player.perceived_accuracy'] X = sm.add_constant(X) X.head() h1_est_1 = sm.OLS(y, X).fit() print(h1_est_1.summary()) # - - - Perceived_rmse - - - # print(Data_A.groupby('lottery_risk_game.1.player.treatment')['lottery_risk_game.1.player.perceived_rmse'].mean()) print(Data_A.groupby('lottery_risk_game.1.player.treatment')['lottery_risk_game.1.player.perceived_rmse'].std()) X = Data_A['treatment'] y = Data_A['lottery_risk_game.1.player.perceived_rmse'] X = sm.add_constant(X) X.head() h1_est_1 = sm.OLS(y, X).fit() print(h1_est_1.summary()) # - - - Privacy - - - # X = Data_A['treatment'] y = Data_A['lottery_risk_game.1.privacy'] X = sm.add_constant(X) X.head() h1_est_1 = sm.OLS(y, X).fit() print(h1_est_1.summary()) # - - - Perceived_rmse - - - # print(Data_A.groupby('lottery_risk_game.1.player.treatment')['Privacy'].mean()) print(Data_A.groupby('lottery_risk_game.1.player.treatment')['Privacy'].std()) X = Data_A['treatment'] y = Data_A['Privacy'] X = sm.add_constant(X) X.head() h1_est_1 = sm.OLS(y, X).fit() print(h1_est_1.summary()) # - - - Cognitive trust - - - # print(Data_A.groupby('lottery_risk_game.1.player.treatment')['Cognitive_trust'].mean()) print(Data_A.groupby('lottery_risk_game.1.player.treatment')['Cognitive_trust'].std()) X = Data_A['treatment'] y = Data_A['Cognitive_trust'] X = sm.add_constant(X) X.head() h1_est_1 = sm.OLS(y, X).fit() print(h1_est_1.summary()) # - - - Emotional trust - - - # print(Data_A.groupby('lottery_risk_game.1.player.treatment')['Emotional_trust'].mean()) print(Data_A.groupby('lottery_risk_game.1.player.treatment')['Emotional_trust'].std()) X = Data_A['treatment'] y = Data_A['Emotional_trust'] X = sm.add_constant(X) X.head() h1_est_1 = sm.OLS(y, X).fit() print(h1_est_1.summary()) # - - - Emotional trust - - - # print(Data_A.groupby('lottery_risk_game.1.player.treatment')['Emotional_trust'].mean()) print(Data_A.groupby('lottery_risk_game.1.player.treatment')['Emotional_trust'].std()) X = Data_A['treatment'] y = Data_A['Emotional_trust'] X = sm.add_constant(X) X.head() h1_est_1 = sm.OLS(y, X).fit() print(h1_est_1.summary())